# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy

# 设置抓取github上的数据
from scrapy.loader import ItemLoader
from scrapy.loader.processors import MapCompose, TakeFirst, Join
from w3lib.html import remove_tags


class GitHubItem(scrapy.Item):
    title = scrapy.Field()


# 51cto
class CtospiderItem(scrapy.Item):
    title = scrapy.Field()
    title_url = scrapy.Field()


# jobblog
def filter_val(value):
    if not value.isdigit():
        return value


class JobblotItemLoader(ItemLoader):
    name = scrapy.Field(
        input_processor=MapCompose(remove_tags, filter_val),
        output_processor=TakeFirst()
    )
    content = scrapy.Field(
        input_processor=MapCompose(remove_tags),
        output_processor=Join()
    )
